/***
This do-file creates a figure that shows the share of job losses occurring in
low vs. high income counties during the Great Recession and the COVID Recession.
***/

*-------------------------------------------------------------------------------
* Set up
*-------------------------------------------------------------------------------

* Set $root
project figstabs, root
if (r(buildrunning)==0) include "${root}/code/config_interactive.do"

* Set globals
project, uses("${root}/code/set_globals.do")
include "${root}/code/set_globals.do"

* Create required subfolders
local category "Employment"
cap mkdir "${root}/results/`category'"
cap mkdir "${root}/results/paper numbers"
cap mkdir "${root}/results/paper numbers/`category'"

*-------------------------------------------------------------------------------
* Define programs
*-------------------------------------------------------------------------------

cap program drop quantile_shares
program define quantile_shares
	syntax varlist(numeric max=2 min=2), lf(varname) [nq(integer 4)]
	marksample touse
	markout `touse' `lf'

	local yvar: word 1 of `varlist'
	local xvar: word 2 of `varlist'

	qui xtile q_`xvar' = `xvar' [aw=`lf'], nq(`nq')

	if (substr("`xvar'", 1, 10) == "poor_share") replace q_`xvar' = `nq' + 1 - q_`xvar'

	forvalues q=1/`nq' {
		sum `yvar' if q_`xvar'==`q' & `touse' [aw=`lf']
		local ur_q`q' = string(r(mean), "%12.1f")
		local ur_q`q'_precise = r(mean)
	}

	preserve
		clear
		set obs `nq'
		gen quantile = .
		gen bar      = .
		forv q = 1/`nq' {
			replace quantile = `q'        if _n == `q'
			replace bar      = `ur_q`q'_precise' if _n == `q'
		}
		su bar
		gen bar_share = bar / `r(sum)'
		save `yvar'_hist, replace
	restore

	local sum_ur_notq1 = 0
	forvalues q=2/`nq' {
		local sum_ur_notq1 = `sum_ur_notq1' + `ur_q`q'_precise'
	}

	local ur_not_q1 = `sum_ur_notq1' / (`nq' - 1)
	local ur_not_q1 = `ur_q4_precise'
	local q1_pct_higher = string((`ur_q1_precise'/`ur_not_q1' - 1) * 100, "%12.0f")

	if (`nq'==4) local quantile quartile
	else if (`nq'==10) local quantile decile
	else local quantile quantile

	forvalues q=1/`nq' {
		di as text "`description' in `quantile' `q' of `xvar': `ur_q`q''%"
	}

	if (round(`q1_pct_higher')>=0) di "People in the bottom `quantile' of `xvar' were `q1_pct_higher'% more likely to lose their jobs than people in the top quartile."
	else di as text "People in the bottom `quantile' of `xvar' were `=-1*`q1_pct_higher''% {bf:less} likely to lose their jobs than people in the top quartile."
	drop q_`xvar'
end

*-------------------------------------------------------------------------------
* Load and process data
*-------------------------------------------------------------------------------

project, uses("${root}/data/derived/BLS/county_employment_crisis.dta")
use "${root}/data/derived/BLS/county_employment_crisis.dta", clear

* Get median household income in 2014-2018
preserve
	project, uses("${root}/data/derived/ACS 2014-2018 5-Year County/ACS 2014-2018 County.dta")
	use "${root}/data/derived/ACS 2014-2018 5-Year County/ACS 2014-2018 County.dta", clear

	rename (county_fips medhhinc_2014_2018_est) (countyfips med_hhinc_2014to2018)
	keep countyfips med_hhinc_2014to2018

	tempfile med_hhinc_20142018
	save `med_hhinc_20142018'

restore

* Get median household income in 2006
preserve
	project, uses("${root}/data/dvc/SAIPE/est06all.xls")
	import excel "${root}/data/dvc/SAIPE/est06all.xls", cellrange(A4:AE3196) clear allstring
	keep A B W
	rename (A B W) (statefips countyfips med_hhinc_2006)

	* Drop state estimates
	drop if countyfips == "0"

	* Correct countyfips variable
	gen countyfips2 = "00" + countyfips if strlen(countyfips) == 1
	replace countyfips2 = "0" + countyfips if strlen(countyfips) == 2
	replace countyfips2 = countyfips if strlen(countyfips) == 3
	destring statefips, replace
	tostring statefips, replace
	drop countyfips
	gen countyfips = statefips + countyfips2
	drop countyfips2
	drop statefips

	destring countyfips, replace

	destring med_hhinc_2006, replace

	tempfile med_hhinc_2006
	save `med_hhinc_2006'

restore

* Merge median household income in 2006
merge 1:1 countyfips using `med_hhinc_2006', keep(1 3) nogen

* Merge median household income in 2014-2018
merge 1:1 countyfips using `med_hhinc_20142018', keep(1 3) nogen

isid countyfips

*-------------------------------------------------------------------------------
* Construct variables
*-------------------------------------------------------------------------------

* COVID recession
gen perc_emp_loss_febtoapr2020 = (employed_apr2020 - employed_feb2020) / employed_feb2020 * -100
gen perc_emp_loss_jantoapr2020 = (employed_apr2020 - employed_jan2020) / employed_jan2020 * -100
sum perc_emp_loss_febtoapr2020 [w=labor_force_feb2020], d
gen d_unemp_rate_febtoapr2020 = unemp_rate_apr2020 - unemp_rate_feb2020
sum d_unemp_rate_febtoapr2020 [w=labor_force_feb2020], d
gen perc_lf_loss_febtoapr2020 = (labor_force_apr2020 - labor_force_feb2020) / labor_force_feb2020 * -100
sum perc_lf_loss_febtoapr2020 [w=labor_force_feb2020], d
* Great Recession
gen perc_emp_loss_2007to2010 = (employed_10 - employed_07) / employed_07 * -100
sum perc_emp_loss_2007to2010 [w=labor_force_07], d
gen d_unemp_rate_2007to2010 = unemp_rate_10 - unemp_rate_07
sum d_unemp_rate_2007to2010 [w=labor_force_07], d
gen perc_lf_loss_2007to2010 = (labor_force_10 - labor_force_07) / labor_force_07 * -100
sum perc_lf_loss_2007to2010 [w=labor_force_07], d
* 2001 Recession
gen perc_emp_loss_2000to2001 = (employed_01 - employed_00) / employed_00 * -100
sum perc_emp_loss_2000to2001 [w=labor_force_00], d
gen d_unemp_rate_2000to2001 = unemp_rate_01 - unemp_rate_00
sum d_unemp_rate_2000to2001 [w=labor_force_00], d
gen perc_lf_loss_2000to2001 = (labor_force_01 - labor_force_00) / labor_force_00 * -100
sum perc_lf_loss_2000to2001 [w=labor_force_00], d
* 1991 Recession
gen perc_emp_loss_1990to1991 = (employed_91 - employed_90) / employed_90 * -100
sum perc_emp_loss_1990to1991 [w=labor_force_90], d
gen d_unemp_rate_1990to1991 = unemp_rate_91 - unemp_rate_90
sum d_unemp_rate_1990to1991 [w=labor_force_90], d
gen perc_lf_loss_1990to1991 = (labor_force_91 - labor_force_90) / labor_force_90 * -100
sum perc_lf_loss_1990to1991 [w=labor_force_90], d
*** Produce numbers
keep countyfips labor_force_07 labor_force_feb2020 labor_force_jan2020 perc_emp_loss_febtoapr2020 perc_emp_loss_jantoapr2020 perc_emp_loss_2007to2010 med_hhinc_2014to2018 med_hhinc_2006
xtile quartiles_2006 = med_hhinc_2006 [aw = labor_force_07], nq(4)
xtile quartiles_20142018 = med_hhinc_2014to2018 [aw = labor_force_jan2020], nq(4)
preserve
	quantile_shares perc_emp_loss_jantoapr2020 med_hhinc_2014to2018, lf(labor_force_jan2020) nq(4)
	use perc_emp_loss_jantoapr2020_hist, clear
	gen recession = "2020_er"
	tempfile apr2020
	save `apr2020'
restore
preserve
	quantile_shares perc_emp_loss_2007to2010 med_hhinc_2006, lf(labor_force_07) nq(4)
	use perc_emp_loss_2007to2010_hist, clear
	gen recession = "2010"
	tempfile rec2010
	save `rec2010'
restore

*-------------------------------------------------------------------------------
* Import UI Claims data
*-------------------------------------------------------------------------------

project, uses("${root}/data/web/data/UI Claims - County - Weekly.csv")
import delimited "${root}/data/web/data/UI Claims - County - Weekly.csv", clear

* Create date variable to filter with the relevant period
tostring year month day_endofweek, replace
gen date2 = year + "-" + month + "-" + day_endofweek
gen date = date(date2, "YMD")
drop date2

* Filter weeks from March 15 to April 12. This includes the weeks from the one
* that ended on 2020/3/21 (21995) to the one that ended on 2020/4/11 (22016)
keep if date >= 21995 & date <= 22016

* Sum UI claims in that period by county
collapse (rawsum) initclaims_count_regular, by(countyfips)

* Format countyfips for merge
tostring countyfips, replace

*-------------------------------------------------------------------------------
* Merge in labor force size
*-------------------------------------------------------------------------------
preserve
	project, uses("${root}/data/dvc/BLS/laucnty19.xlsx")
	import excel "${root}/data/dvc/BLS/laucnty19.xlsx", clear
	keep B C G
	rename B statefips
	rename C countyfips
	rename G labor_force
	drop if _n < 7
	destring statefips countyfips labor_force, replace
	replace countyfips = statefips * 1000 + countyfips
	drop if mi(countyfips)
	tostring countyfips, replace
	isid countyfips
	keep countyfips labor_force statefips
	rename labor_force labor_force_2019
	tempfile lf
	save `lf'
restore

merge m:1 countyfips using `lf', assert(2 3) keep(3) nogen

*-------------------------------------------------------------------------------
* Create cumul_initclaimsrate_2020w14
*-------------------------------------------------------------------------------

gen cumul_initclaimsrate_2020w14 = (initclaims_count_regular / labor_force_2019) * 100

destring countyfips, replace

merge 1:1 countyfips using `med_hhinc_20142018', nogen

drop statefips
tostring countyfips, replace
gen statefips = "0" + substr(countyfips, 1,1) if strlen(countyfips) == 4
replace statefips = substr(countyfips, 1, 2) if strlen(countyfips) == 5
destring statefips, replace
destring countyfips, replace

project, uses("${root}/code/ado_ssc/statastates.ado")
project, uses("${root}/code/ado_ssc/statastates_data.ado")
project, uses("${root}/code/ado_ssc/statastates.sthlp") reference
statastates, fips(statefips)
drop _merge

gen cumul_mar15_to_apr12 = cumul_initclaimsrate_2020w14

*-------------------------------------------------------------------------------
* Merge the April 2020 employment data
*-------------------------------------------------------------------------------

preserve

	* Create monthly county level emmployment data from BLS data
	project, uses("${root}/data/dvc/BLS/laucntycur14.txt")
	import delimited "${root}/data/dvc/BLS/laucntycur14.txt", delimiter("|") varnames(6) encoding(ISO-8859-2) colrange(2:9) stringcols(_all) clear
	rename (v*) (state_fips county_fips location period labor_force_size employed unemployed unemployment_rate)
	drop if mi(state_fips) | mi(county_fips)

	* create unique countyfips
	rename county_fips countyfips
	rename state_fips statefips

	destring statefips, replace
	tostring statefips, replace
	replace countyfips = statefips + countyfips
	replace countyfips = subinstr(countyfips, " ", "", .)

	* Create separate variables for county name and state abbreviation
	split location, parse(", ")
	rename (location1 location2) (county_name state_abbrev)

	* Drop extraneous month
	drop if regexm(period, "Dec-18") == 1

	* Format date variable
	drop if period == ""
	replace period = subinstr(period, "-", "", .)
	replace period = lower(subinstr(period, " p", "", .))
	replace period = subinstr(period, "(p)", "", .)
	gen int date = date(period, "M20Y")
	format date %td
	assert ~mi(date)

	* Drop extraneous variables
	drop location period

	* Destring variables
	replace labor_force_size = subinstr(labor_force_size, ",", "", .)
	replace employed = subinstr(employed, ",", "", .)
	replace unemployed = subinstr(unemployed, ",", "", .)
	destring statefips countyfips labor_force_size employed unemployed unemployment_rate, replace

	* Order variables
	order state_abbrev statefips county_name countyfips date

	* Check that observations are unique
	isid statefips countyfips date
	sort statefips countyfips date

	gen year = year(date)
	gen month = month(date)

	drop if state_abbrev=="PR"
	keep if year == 2020 & month == 4
	keep countyfips unemployment_rate
	rename unemployment_rate unemp_rate_apr2020
	isid countyfips
	tempfile apr2020_unemp
	save `apr2020_unemp'
restore

* All the usingdata match, county (Kalawao County, Hawaii) 15005 from master doesn't match.
merge 1:1 countyfips using `apr2020_unemp', assert(1 3)

preserve
	drop if mi(cumul_mar15_to_apr12)
	keep statefips
	bysort statefips: gen counter = _n
	drop if counter > 1
	tempfile states_used
	save `states_used'
	* Import population by state
	project, uses("${root}/data/web/data/GeoIDs - State.csv")
	import delimited "${root}/data/web/data/GeoIDs - State.csv", clear
	drop if statefips == 72
	merge 1:1 statefips using `states_used', keepusing(statefips)
	gegen sum_used = sum(state_pop2019) if _merge == 3
	gegen sum_total = sum(state_pop2019)
	drop if mi(sum_used) | mi(sum_total)
	local ui_coverage_stat = sum_used[1] / sum_total[1]
	di `ui_coverage_stat'
restore

count if _merge == 1
drop _merge
quantile_shares cumul_mar15_to_apr12 med_hhinc_2014to2018, lf(labor_force_2019) nq(4)
use cumul_mar15_to_apr12_hist, clear
gen recession = "2020_ui"
append using `rec2010'
append using `apr2020'
drop bar
reshape wide bar_share, i(recession) j(quantile)

forvalues i = 1/4 {
replace bar_share`i' = 100 * bar_share`i'
}

*-------------------------------------------------------------------------------
* Plot
*-------------------------------------------------------------------------------

graph bar bar_share*, ///
	over(recession, relabel(1 `" "2007 to 2010" "Employment Loss" "' 2 `" "Jan to Apr 2020" "Employment Loss" "' 3 `" "Week 11 to Week 14 2020" "UI Claims" "')) ///
	yl(0 "0%" 10 "10%" 20 "20%" 30 "30%" ///
	,  labsize(medsmall) nogrid)  ytitle("Share of Employment Changes (%)") ///
	legend(order(1 "Bottom" 2 "Second" 3 "Third" 4 "Top") size(small) col(4) subtitle("Quartile of County Median Income", color(gs8))) ///
	${title_`version'}

gr_edit plotregion1.bars[1].style.editstyle shadestyle(color(oi1)) editcopy
gr_edit plotregion1.bars[1].style.editstyle linestyle(color(oi1)) editcopy
// bars[1] color

gr_edit plotregion1.bars[2].style.editstyle shadestyle(color(oi3)) editcopy
gr_edit plotregion1.bars[2].style.editstyle linestyle(color(oi3)) editcopy
// bars[2] color

gr_edit plotregion1.bars[3].style.editstyle shadestyle(color(oi4)) editcopy
gr_edit plotregion1.bars[3].style.editstyle linestyle(color(oi4)) editcopy
// bars[3] color

gr_edit plotregion1.bars[4].style.editstyle shadestyle(color(oi2)) editcopy
gr_edit plotregion1.bars[4].style.editstyle linestyle(color(oi2)) editcopy

oi_graph_export "${root}/results/Employment/Employment Loss by Income Quartile Great Recession vs COVID", type(${fig_type})

*-------------------------------------------------------------------------------
* Scalars for paper: employment losses in Q1 vs. Q4 during the Great Recession
*-------------------------------------------------------------------------------

sum bar_share1 if recession == "2010"
assert r(N) == 1
local emp_loss_2008_q1: di %3.1f r(mean)

sum bar_share4 if recession == "2010"
assert r(N) == 1
local emp_loss_2008_q4: di %3.1f r(mean)

*-------------------------------------------------------------------------------
* Stats on impact of Great Recession 2007-2010
*-------------------------------------------------------------------------------

project, uses("${root}/data/derived/BLS/county_employment_crisis.dta")
use "${root}/data/derived/BLS/county_employment_crisis.dta", clear

* County population
rename countyfips county_fips
project, uses("${root}/data/derived/ACS 2014-2018 5-Year County/ACS 2014-2018 County.dta")
merge 1:1 county_fips using "${root}/data/derived/ACS 2014-2018 5-Year County/ACS 2014-2018 County.dta", keepusing(pop_2014_2018_est) keep(3) nogen
rename county_fips countyfips
rename pop_2014_2018_est pop_2018
* CZ
merge 1:1 countyfips using "${root}/data/dvc/Opportunity Atlas/county_covars_atlas.dta", nogen keep(1 3) keepusing(cz)

* Collapse to CZ level
gcollapse (rawsum) pop_2018 (sum) employed_07 employed_10 [w = pop_2018], by(cz)

* Employment losses 2007-2010, percentages
gen emp_loss = (employed_10 - employed_07) / employed_07 * 100

* Quartiles of employment losses
gquantiles emp_loss_quartile = emp_loss [w = pop_2018], xtile nq(4)

* Employment losses in worst-hit and least-hit CZ quartiles
sum emp_loss [w = pop_2018] if emp_loss_quartile == 1
assert r(mean) < 0                                                              // worst-hit CZ quartile lost emp
local emp_loss_great_recession_worst: di %3.2f abs(r(mean))
sum emp_loss [w = pop_2018] if emp_loss_quartile == 4
assert r(mean) > 0                                                              // least-hit CZ quartile grew in emp
local emp_loss_great_recession_least: di %3.2f abs(r(mean))

* Output numbers for paper
cap erase "${root}/results/paper numbers/`category'/Great Recession CZ employment changes.yaml"

yamlout using "${root}/results/paper numbers/`category'/Great Recession CZ employment changes.yaml",  ///
	key("emp_loss_great_recession_worst") ///
	comment("Emp Loss (%) 2007-2010, Worst Hit CZs") ///
	value(`emp_loss_great_recession_worst') fmt(%9.2f)

yamlout using "${root}/results/paper numbers/`category'/Great Recession CZ employment changes.yaml",  ///
	key("emp_loss_great_recession_least") ///
	comment("Emp Loss (%) 2007-2010, Least Hit CZs") ///
	value(`emp_loss_great_recession_least') fmt(%9.2f)

yamlout using "${root}/results/paper numbers/`category'/Great Recession CZ employment changes.yaml",  ///
	key("emp_loss_2008_q1") ///
	comment("Emp Loss (%) 2007-2010, Q1 Income") ///
	value(`emp_loss_2008_q1') fmt(%2.0f)

yamlout using "${root}/results/paper numbers/`category'/Great Recession CZ employment changes.yaml",  ///
	key("emp_loss_2008_q4") ///
	comment("Emp Loss (%) 2007-2010, Q4 Income") ///
	value(`emp_loss_2008_q4') fmt(%2.0f)

project, creates("${root}/results/paper numbers/`category'/Great Recession CZ employment changes.yaml")
